package data_deepprocessing.algorithm.ssvm.ssvm_util;

import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.deeplearning4j.models.word2vec.Word2Vec;

import data_deepprocessing.algorithm.word_embedding.service.YYH_Word2VectorService;
import data_deepprocessing.prepareData.beans.XianBingShi_new_zhangBean;
import data_deepprocessing.prepareData.db.InitSeedDB;
import data_deepprocessing.prepareData.db.XianBingShi_New_zhangDB;
import data_deepprocessing.util.FileUtil;

/** 
* @author  作者 : YUHU YUAN
* @date 创建时间：2017年3月31日 下午10:27:31 
* @version 1.0  
* 1:得到List<XianBingSHiBean> list；
* 2:得到分词后的结果；
* 3：通过word embedding 的到vector和 model文件
* 4：利用list中的信息何vector和model文件生成 ssvm要用的配置文件
* 5：制作训练集和测试集的数据
* 6：调用算法准备开始试验。
*	现在试验设计和实验方法部分就好写了吧，这部分的内容好好的组织一下 
*   
*	注释：XBS均指XianBingShi
*/

public class SsvmService2CrossValidService {
	
	private InitSeedDB initSeedDB;
	private XianBingShi_New_zhangDB xianBingShi_New_zhangDB;
	
	//这里先自己找一部分 然后再实验一下 十重交叉验证，还有现在到底能不能用还不能确定
	//希望生成的数据集没有问题把，现在先全部的训练一遍
	private List<XianBingShi_new_zhangBean> doGetSegmentedXianBingShi(){
		return xianBingShi_New_zhangDB.selectALlXianBingShi();
	}
	
	private Word2Vec doGetWord2Vec(){
		
		try {
			return YYH_Word2VectorService.getWord2Vec();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return null;
	}
	
	private List<String> doGetSymptom_Dictionary(){
		return initSeedDB.selectAllSeedContent();
	}
	
	private void divideDataset(){
		List<XianBingShi_new_zhangBean> beanList = doGetSegmentedXianBingShi();
		while (!beanList.isEmpty()) {
			int size = beanList.size();
			int index = (int) (Math.random() * size);
			XianBingShi_new_zhangBean tmp = beanList.get(index);
			divided_dataset.get(size % 10).add(tmp);
			beanList.remove(index);
		}
		
	}
	
	
	private Map<Integer, List<XianBingShi_new_zhangBean>> divided_dataset = initMap();
	
	private Map<Integer, List<XianBingShi_new_zhangBean>> initMap(){
		Map<Integer, List<XianBingShi_new_zhangBean>> divided_dataset = new HashMap<>();
		for (int i = 0; i < 10; i++) {
			List<XianBingShi_new_zhangBean> fold = new ArrayList<XianBingShi_new_zhangBean>();
			divided_dataset.put(i, fold);
		}
		return divided_dataset;
	}
	
	
	/** 
	* @author  作者 : YUHU YUAN
	* @date 创建时间：2017年4月9日 下午10:37:03 
	* @parameter 
	* @return
	* @throws
	*/
	private void doGenerateSsvmDataSet2Train(String path,List<XianBingShi_new_zhangBean> xianbingshiBeanList,Word2Vec word2Vec) throws IOException{
		BufferedWriter writer = FileUtil.getWriter(path);
		List<String> symptom_dictionary = doGetSymptom_Dictionary();//这里换成map会不会查找的更快
		StringBuffer constructTerm = new StringBuffer();
		int qid = 1;//SVM程序要求必须从1开始递增
		for(XianBingShi_new_zhangBean bean : xianbingshiBeanList){
			String segmentedXBS = bean.getContent_segmented();
			if(segmentedXBS.length()<20){
				continue;
			}
			String[] wordOrPhrases = segmentedXBS.trim().split("[\\s]+");
			for(String word:wordOrPhrases){
				if(word==null && word.equals("")&&word.trim().equals("")){
					continue;
				}
				double[] voctor = word2Vec.getWordVector(word);
				String tag = (symptom_dictionary.contains(word)?1:2)+"";
				constructTerm.append(tag).append(" ");
				constructTerm.append("qid:").append(qid).append(" ");
				if(voctor!=null ){
					
					for(int i=0; i<voctor.length; ++i){
						constructTerm.append((i+1)+":"+voctor[i]).append(" ");
					}
				}
				constructTerm.append("#").append(word);
				writer.write(constructTerm.toString());
				writer.flush();
				writer.newLine();
				constructTerm.setLength(0);
			}
			++qid;
		}
		if(writer!=null){
			writer.close();
		}
		
	}
	
	private void doGenerateSsvmDataSet2Test(String path,String path2Judge,List<XianBingShi_new_zhangBean> xianbingshiBeanList,Word2Vec word2Vec) throws IOException{
		BufferedWriter writer = FileUtil.getWriter(path);
		BufferedWriter writer2judge = FileUtil.getWriter(path2Judge);
		List<String> symptom_dictionary = doGetSymptom_Dictionary();//这里换成map会不会查找的更快
		StringBuffer constructTerm = new StringBuffer();
		StringBuffer constructTerm2judge = new StringBuffer();
		int qid = 1;   //这里主要是SVM程序要求必须从1开始，这样就必须自己设计
		for(XianBingShi_new_zhangBean bean : xianbingshiBeanList){
			String segmentedXBS = bean.getContent_segmented();
			if(segmentedXBS.length()<20){
				continue;
			}
			String[] wordOrPhrases = segmentedXBS.trim().split("[\\s]+");
			for(String word:wordOrPhrases){
				if(word==null && word.equals("")&&word.trim().equals("")){
					continue;
				}
				double[] voctor = word2Vec.getWordVector(word);
				String tag = (symptom_dictionary.contains(word)?1:2)+"";
				constructTerm.append(tag).append(" ");
				constructTerm.append("qid:").append(qid).append(" ");
				constructTerm2judge.append(tag).append(" ");
				constructTerm2judge.append("qid:").append(bean.getId()).append(" ")
				.append(qid).append(" ");
				if(voctor!=null ){
					
					for(int i=0; i<voctor.length; ++i){
						constructTerm.append((i+1)+":"+voctor[i]).append(" ");
					}
				}
				constructTerm.append("#").append(word);
				constructTerm2judge.append("#").append(word);
				writer.write(constructTerm.toString());
				writer.flush();
				writer.newLine();
				writer2judge.write(constructTerm2judge.toString());
				writer2judge.flush();
				writer2judge.newLine();
				constructTerm.setLength(0);
				constructTerm2judge.setLength(0);
			}
			++qid;
		}
		if(writer!=null){
			writer.close();
		}
		if(writer2judge!=null){
			writer2judge.close();
		}
		
	}
	
	
	private static String path = "D:\\yyh_yuanyuhu_graduation_experimental\\SSVM\\dataset\\";
	
	public void doGenerateSsvmCrossValidDataSet(){
		divideDataset();
		Word2Vec word2Vec = doGetWord2Vec();
		
		for(int i=0 ; i<divided_dataset.size(); ++i){
			StringBuffer testPath = new StringBuffer(path).append("data")
					.append(i).append(File.separator).append("test")
					.append(File.separator).append("test.txt");
			StringBuffer testPath2Judge = new StringBuffer(path).append("data")
					.append(i).append(File.separator).append("test")
					.append(File.separator).append("test2Judge.txt");
			List<XianBingShi_new_zhangBean> testDataSet = divided_dataset.get(i);
			try {
				doGenerateSsvmDataSet2Test(testPath.toString(),testPath2Judge.toString(),testDataSet, word2Vec);
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			StringBuffer trainPath = new StringBuffer(path).append("data")
					.append(i).append(File.separator).append("train")
					.append(File.separator).append("train.dat");
			List<XianBingShi_new_zhangBean> trainDataSet = new ArrayList<>();
			for(int j=0; j<divided_dataset.size(); ++j){
				if(j!=i){
					trainDataSet.addAll(divided_dataset.get(j));
				}
			}
			try {
				doGenerateSsvmDataSet2Train(trainPath.toString(),trainDataSet, word2Vec);
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			testPath.setLength(0);
			testPath2Judge.setLength(0);
			trainPath.setLength(0);
			
		}
		
	}

	public InitSeedDB getInitSeedDB() {
		return initSeedDB;
	}

	public void setInitSeedDB(InitSeedDB initSeedDB) {
		this.initSeedDB = initSeedDB;
	}

	public XianBingShi_New_zhangDB getXianBingShi_New_zhangDB() {
		return xianBingShi_New_zhangDB;
	}

	public void setXianBingShi_New_zhangDB(XianBingShi_New_zhangDB xianBingShi_New_zhangDB) {
		this.xianBingShi_New_zhangDB = xianBingShi_New_zhangDB;
	}
	
	
	
	
}















